Import Packages
library("ggplot2")
library('dplyr')
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library('tidyverse')
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.4 v purrr 0.3.4
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library('geosphere')
library("ggmap")
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
Importing Data
# Reading in the sample CSV of rider data we made
rider_2019_sample <- read.csv('sample.csv', stringsAsFactors = TRUE)
head(rider_2019_sample)
## tripduration starttime stoptime
## 1 564 2019-04-11 07:44:36.0580 2019-04-11 07:54:00.1840
## 2 1158 2019-05-15 18:00:33.3890 2019-05-15 18:19:52.0150
## 3 763 2019-03-25 13:27:50.4260 2019-03-25 13:40:33.7960
## 4 915 2019-06-21 15:52:07.8340 2019-06-21 16:07:23.6810
## 5 1368 2019-06-01 18:42:22.9500 2019-06-01 19:05:11.7510
## 6 267 2019-07-31 18:47:05.5630 2019-07-31 18:51:33.0870
## start.station.id start.station.name start.station.latitude
## 1 3711 E 13 St & Avenue A 40.72967
## 2 3016 Kent Ave & N 7 St 40.72037
## 3 382 University Pl & E 14 St 40.73493
## 4 359 E 47 St & Park Ave 40.75510
## 5 3295 Central Park W & W 96 St 40.79127
## 6 3377 Carroll St & Bond St 40.67861
## start.station.longitude end.station.id end.station.name end.station.latitude
## 1 -73.98068 168 W 18 St & 6 Ave 40.73971
## 2 -73.96165 3016 Kent Ave & N 7 St 40.72037
## 3 -73.99201 459 W 20 St & 11 Ave 40.74674
## 4 -73.97499 483 E 12 St & 3 Ave 40.73223
## 5 -73.96484 3142 1 Ave & E 62 St 40.76123
## 6 -73.99037 3398 Smith St & 9 St 40.67470
## end.station.longitude bikeid usertype birth.year gender
## 1 -73.99456 29807 Subscriber 1994 1
## 2 -73.96165 34411 Subscriber 1974 1
## 3 -74.00776 16078 Subscriber 1961 1
## 4 -73.98890 29904 Subscriber 1964 2
## 5 -73.96094 30247 Customer 1969 0
## 6 -73.99786 20315 Subscriber 1971 1
# Reading in the weather data set
weather_data <- read.csv('NYCWeather2019.csv', stringsAsFactors = TRUE)
head(weather_data)
## STATION NAME DATE AWND PRCP SNOW SNWD TAVG
## 1 USW00094728 NY CITY CENTRAL PARK, NY US 1/1/2019 NA 0.06 0 0 NA
## 2 USW00094728 NY CITY CENTRAL PARK, NY US 1/2/2019 NA 0.00 0 0 NA
## 3 USW00094728 NY CITY CENTRAL PARK, NY US 1/3/2019 NA 0.00 0 0 NA
## 4 USW00094728 NY CITY CENTRAL PARK, NY US 1/4/2019 NA 0.00 0 0 NA
## 5 USW00094728 NY CITY CENTRAL PARK, NY US 1/5/2019 NA 0.50 0 0 NA
## 6 USW00094728 NY CITY CENTRAL PARK, NY US 1/6/2019 NA 0.00 0 0 NA
## TMAX TMIN
## 1 58 39
## 2 40 35
## 3 44 37
## 4 47 35
## 5 47 41
## 6 49 31
Initial Data Summary
# Initial summary of rider data set
str(rider_2019_sample)
## 'data.frame': 100000 obs. of 15 variables:
## $ tripduration : int 564 1158 763 915 1368 267 661 1112 520 512 ...
## $ starttime : Factor w/ 99999 levels "2019-01-01 00:56:30.7720",..: 18803 28405 14066 41002 34169 54789 95279 5247 68397 75686 ...
## $ stoptime : Factor w/ 100000 levels "2019-01-01 01:34:45.0200",..: 18804 28409 14065 41001 34174 54787 95282 5246 68395 75682 ...
## $ start.station.id : Factor w/ 825 levels "116","119","120",..: 621 86 688 538 263 348 749 80 259 545 ...
## $ start.station.name : Factor w/ 894 levels "1 Ave & E 110 St",..: 352 545 760 386 250 234 797 672 440 99 ...
## $ start.station.latitude : num 40.7 40.7 40.7 40.8 40.8 ...
## $ start.station.longitude: num -74 -74 -74 -74 -74 ...
## $ end.station.id : Factor w/ 828 levels "116","119","120",..: 15 86 752 774 184 369 623 27 333 509 ...
## $ end.station.name : Factor w/ 890 levels "1 Ave & E 110 St",..: 793 549 795 350 7 714 787 371 598 92 ...
## $ end.station.latitude : num 40.7 40.7 40.7 40.7 40.8 ...
## $ end.station.longitude : num -74 -74 -74 -74 -74 ...
## $ bikeid : int 29807 34411 16078 29904 30247 20315 40128 33989 29972 20897 ...
## $ usertype : Factor w/ 2 levels "Customer","Subscriber": 2 2 2 2 1 2 1 2 2 2 ...
## $ birth.year : int 1994 1974 1961 1964 1969 1971 1969 1960 1972 1966 ...
## $ gender : int 1 1 1 2 0 1 0 1 1 1 ...
summary(rider_2019_sample)
## tripduration starttime
## Min. : 61.0 2019-11-22 17:59:58.4760: 2
## 1st Qu.: 362.0 2019-01-01 00:56:30.7720: 1
## Median : 614.0 2019-01-01 01:35:30.5010: 1
## Mean : 950.8 2019-01-01 02:04:41.7180: 1
## 3rd Qu.: 1075.0 2019-01-01 02:25:28.9700: 1
## Max. :2769536.0 2019-01-01 02:33:50.6550: 1
## (Other) :99993
## stoptime start.station.id
## 2019-01-01 01:34:45.0200: 1 519 : 810
## 2019-01-01 01:51:55.8730: 1 3255 : 617
## 2019-01-01 02:13:13.4810: 1 497 : 602
## 2019-01-01 02:29:13.1090: 1 402 : 561
## 2019-01-01 03:04:23.8640: 1 435 : 551
## 2019-01-01 04:09:48.6020: 1 (Other):96523
## (Other) :99994 NA's : 336
## start.station.name start.station.latitude start.station.longitude
## Pershing Square North: 810 Min. :40.66 Min. :-74.03
## 8 Ave & W 31 St : 617 1st Qu.:40.72 1st Qu.:-74.00
## E 17 St & Broadway : 602 Median :40.74 Median :-73.98
## Broadway & E 22 St : 561 Mean :40.74 Mean :-73.98
## W 21 St & 6 Ave : 551 3rd Qu.:40.76 3rd Qu.:-73.97
## Broadway & E 14 St : 548 Max. :40.85 Max. :-73.88
## (Other) :96311
## end.station.id end.station.name end.station.latitude
## 519 : 792 Pershing Square North: 792 Min. :40.66
## 402 : 636 Broadway & E 22 St : 636 1st Qu.:40.72
## 3255 : 632 8 Ave & W 31 St : 632 Median :40.74
## 497 : 623 E 17 St & Broadway : 623 Mean :40.74
## 285 : 547 Broadway & E 14 St : 547 3rd Qu.:40.76
## (Other):96426 W 21 St & 6 Ave : 544 Max. :40.86
## NA's : 344 (Other) :96226
## end.station.longitude bikeid usertype birth.year
## Min. :-74.03 Min. :14529 Customer :14054 Min. :1885
## 1st Qu.:-74.00 1st Qu.:25346 Subscriber:85946 1st Qu.:1970
## Median :-73.99 Median :30918 Median :1983
## Mean :-73.98 Mean :29674 Mean :1980
## 3rd Qu.:-73.97 3rd Qu.:35049 3rd Qu.:1990
## Max. :-73.89 Max. :42046 Max. :2003
##
## gender
## Min. :0.000
## 1st Qu.:1.000
## Median :1.000
## Mean :1.161
## 3rd Qu.:1.000
## Max. :2.000
##
# Initial summart of weather data set
str(weather_data)
## 'data.frame': 365 obs. of 10 variables:
## $ STATION: Factor w/ 1 level "USW00094728": 1 1 1 1 1 1 1 1 1 1 ...
## $ NAME : Factor w/ 1 level "NY CITY CENTRAL PARK, NY US": 1 1 1 1 1 1 1 1 1 1 ...
## $ DATE : Factor w/ 365 levels "1/1/2019","1/10/2019",..: 1 12 23 26 27 28 29 30 31 2 ...
## $ AWND : num NA NA NA NA NA NA NA NA NA NA ...
## $ PRCP : num 0.06 0 0 0 0.5 0 0 0.17 0.06 0 ...
## $ SNOW : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SNWD : num 0 0 0 0 0 0 0 0 0 0 ...
## $ TAVG : logi NA NA NA NA NA NA ...
## $ TMAX : int 58 40 44 47 47 49 34 45 45 34 ...
## $ TMIN : int 39 35 37 35 41 31 25 34 34 28 ...
summary(rider_2019_sample)
## tripduration starttime
## Min. : 61.0 2019-11-22 17:59:58.4760: 2
## 1st Qu.: 362.0 2019-01-01 00:56:30.7720: 1
## Median : 614.0 2019-01-01 01:35:30.5010: 1
## Mean : 950.8 2019-01-01 02:04:41.7180: 1
## 3rd Qu.: 1075.0 2019-01-01 02:25:28.9700: 1
## Max. :2769536.0 2019-01-01 02:33:50.6550: 1
## (Other) :99993
## stoptime start.station.id
## 2019-01-01 01:34:45.0200: 1 519 : 810
## 2019-01-01 01:51:55.8730: 1 3255 : 617
## 2019-01-01 02:13:13.4810: 1 497 : 602
## 2019-01-01 02:29:13.1090: 1 402 : 561
## 2019-01-01 03:04:23.8640: 1 435 : 551
## 2019-01-01 04:09:48.6020: 1 (Other):96523
## (Other) :99994 NA's : 336
## start.station.name start.station.latitude start.station.longitude
## Pershing Square North: 810 Min. :40.66 Min. :-74.03
## 8 Ave & W 31 St : 617 1st Qu.:40.72 1st Qu.:-74.00
## E 17 St & Broadway : 602 Median :40.74 Median :-73.98
## Broadway & E 22 St : 561 Mean :40.74 Mean :-73.98
## W 21 St & 6 Ave : 551 3rd Qu.:40.76 3rd Qu.:-73.97
## Broadway & E 14 St : 548 Max. :40.85 Max. :-73.88
## (Other) :96311
## end.station.id end.station.name end.station.latitude
## 519 : 792 Pershing Square North: 792 Min. :40.66
## 402 : 636 Broadway & E 22 St : 636 1st Qu.:40.72
## 3255 : 632 8 Ave & W 31 St : 632 Median :40.74
## 497 : 623 E 17 St & Broadway : 623 Mean :40.74
## 285 : 547 Broadway & E 14 St : 547 3rd Qu.:40.76
## (Other):96426 W 21 St & 6 Ave : 544 Max. :40.86
## NA's : 344 (Other) :96226
## end.station.longitude bikeid usertype birth.year
## Min. :-74.03 Min. :14529 Customer :14054 Min. :1885
## 1st Qu.:-74.00 1st Qu.:25346 Subscriber:85946 1st Qu.:1970
## Median :-73.99 Median :30918 Median :1983
## Mean :-73.98 Mean :29674 Mean :1980
## 3rd Qu.:-73.97 3rd Qu.:35049 3rd Qu.:1990
## Max. :-73.89 Max. :42046 Max. :2003
##
## gender
## Min. :0.000
## 1st Qu.:1.000
## Median :1.000
## Mean :1.161
## 3rd Qu.:1.000
## Max. :2.000
##
Initial Data Analysis
Rider Age
rider_2019_sample$age <- 2019 - as.numeric(as.character(rider_2019_sample$birth.year))
rider_2019_sample <- filter(rider_2019_sample, age <= 80)
Gender Split in Riders
# Reclassifying the genders
# 0=unknown, 1=male, 2=female
rider_2019_sample$gender <- ifelse(rider_2019_sample$gender == 0, "Unkown",
ifelse(rider_2019_sample$gender == 1, "Male", "Female"))
# Seeing the split of genders who rented bikes
rider_2019_sample %>%
ggplot(aes(x=gender)) +
geom_bar()

Subscriber vs Customer for Riders
# Seeing the split of user type who rented bikes
rider_2019_sample %>%
ggplot(aes(x=usertype)) +
geom_bar()

Trip Duration
# Range of all bike rides
rider_2019_sample <- filter(rider_2019_sample, tripduration <= 3000)
duration_range <- range(rider_2019_sample$tripduration, na.rm=TRUE)
duration_range
## [1] 61 3000
# Average length of a bike ride
duration_mean <- mean(rider_2019_sample$tripduration, na.rm=TRUE)
duration_mean
## [1] 776.546
# Standard deviation of bike rides
duration_sd <- sd(rider_2019_sample$tripduration, na.rm=TRUE)
duration_sd
## [1] 561.1764
Adjusting Dates in Data Sets
# Creating columns of just month, day, and year
weather_data$DATE <- as.Date(weather_data$DATE, format = "%m/%d/%Y")
weather_data$Month <- format(weather_data$DATE, "%m")
weather_data$Day <- format(weather_data$DATE, "%d")
weather_data$Year <- format(weather_data$DATE, "%Y")
# Creating columns of just month, day, and year
rider_2019_sample$DATE <- as.Date(rider_2019_sample$starttime, format = "%Y-%m-%d")
rider_2019_sample$Month <- format(rider_2019_sample$DATE, "%m")
rider_2019_sample$Day <- format(rider_2019_sample$DATE, "%d")
rider_2019_sample$Year <- format(rider_2019_sample$DATE, "%Y")
Types of Weather per Month
# Average precipitation per month
weather_data %>%
summarise(average_precip = tapply(PRCP, Month, mean, na.rm=TRUE))
## average_precip
## 1 0.11548387
## 2 0.11214286
## 3 0.12483871
## 4 0.15166667
## 5 0.22000000
## 6 0.18200000
## 7 0.18612903
## 8 0.11935484
## 9 0.03166667
## 10 0.19838710
## 11 0.06500000
## 12 0.22870968
# Average snow per month
weather_data %>%
summarise(avg_snow = tapply(SNOW, Month, mean, na.rm=TRUE))
## avg_snow
## 1 0.03548387
## 2 0.09285714
## 3 0.33548387
## 4 0.00000000
## 5 0.00000000
## 6 0.00000000
## 7 0.00000000
## 8 0.00000000
## 9 0.00000000
## 10 0.00000000
## 11 0.00000000
## 12 0.08064516
# Average wind speed per month
weather_data %>%
summarise(average_wind_speed = tapply(AWND, Month, mean, na.rm=TRUE))
## average_wind_speed
## 1 NaN
## 2 NaN
## 3 5.326667
## 4 4.399667
## 5 3.932581
## 6 4.159667
## 7 3.463226
## 8 3.839032
## 9 4.302333
## 10 5.288710
## 11 5.673667
## 12 6.385806
Exploratory Data Analysis - Weather Effects
# Combining data frames to compare data
edited_weather <- select(weather_data, PRCP, SNOW, AWND, DATE)
edited_rider <- select(rider_2019_sample, age, tripduration, DATE)
total_data = merge(edited_weather, edited_rider, by.x="DATE", by.y="DATE", all.x=TRUE)
head(total_data)
## DATE PRCP SNOW AWND age tripduration
## 1 2019-01-01 0.06 0 NA 35 123
## 2 2019-01-01 0.06 0 NA 34 2757
## 3 2019-01-01 0.06 0 NA 26 208
## 4 2019-01-01 0.06 0 NA 50 1496
## 5 2019-01-01 0.06 0 NA 31 511
## 6 2019-01-01 0.06 0 NA 46 319
Average Precipitation by Age
# Mean PRCP by Age of Rider
total_data %>%
group_by(age) %>%
summarise(mean_PRCP_by_age = mean(PRCP)) %>%
ggplot(aes(x = age, y = mean_PRCP_by_age)) + geom_line() + geom_smooth()
## `summarise()` ungrouping output (override with `.groups` argument)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Average Wind by Age
# Mean Wind by Age of Rider
total_data %>%
group_by(age) %>%
summarise(mean_AWND_by_age = mean(AWND,na.rm = TRUE)) %>%
ggplot(aes(x = age, y = mean_AWND_by_age)) + geom_line() + geom_smooth()
## `summarise()` ungrouping output (override with `.groups` argument)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Precipitation Effects on Trip Duration
# Average ride time when it's raining
total_data %>%
filter(PRCP > 0) %>%
summarise(prcp_duration_mean = mean(tripduration))
## prcp_duration_mean
## 1 759.4454
total_data %>%
filter(PRCP > 0) %>%
ggplot(aes(x = tripduration)) +
geom_histogram(aes(y=..density..), colour="black", fill="white") +
geom_density(alpha=.2, fill="#FF6666")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

total_data %>%
filter(PRCP > 0) %>%
ggplot(aes(x = tripduration)) +
geom_density(aes(fill=factor(PRCP)), alpha=0.8)

total_data %>%
filter(PRCP > 0) %>%
ggplot(aes(x = tripduration)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Snow Effects on Trip Duration
# Average ride time when it's snowing
total_data %>%
filter(SNOW > 0) %>%
summarise(snow_duration_mean = mean(tripduration))
## snow_duration_mean
## 1 655.9662
total_data %>%
filter(SNOW > 0) %>%
ggplot(aes(x = tripduration)) +
geom_histogram(aes(y=..density..), colour="black", fill="white") +
geom_density(alpha=.2, fill="#FF6666")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

total_data %>%
filter(SNOW > 0) %>%
ggplot(aes(x = tripduration)) +
geom_density(aes(fill=factor(SNOW)), alpha=0.8)

total_data %>%
filter(SNOW > 0) %>%
ggplot(aes(x = tripduration)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Wind Effects on Trip Duration
# Average ride time when it's windy
total_data %>%
filter(AWND > 0) %>%
summarise(wind_duration_mean = mean(tripduration))
## wind_duration_mean
## 1 790.543
total_data %>%
filter(AWND > 0) %>%
ggplot(aes(x = tripduration)) +
geom_histogram(aes(y=..density..), colour="black", fill="white") +
geom_density(alpha=.2, fill="#FF6666")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

total_data %>%
filter(AWND > 0) %>%
ggplot(aes(x = tripduration)) +
geom_density(aes(fill=factor(AWND)), alpha=0.8)

total_data %>%
filter(AWND > 0) %>%
ggplot(aes(x = tripduration)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Exploratory Data Analysis - Ride History
Distance Between Stations
# Distance between start and end station in Meters
rider_2019_sample <- mutate(rider_2019_sample,
distance = distHaversine(cbind(rider_2019_sample$start.station.longitude,
rider_2019_sample$start.station.latitude),
cbind(rider_2019_sample$end.station.longitude,
rider_2019_sample$end.station.latitude)))
head(rider_2019_sample)
## tripduration starttime stoptime
## 1 564 2019-04-11 07:44:36.0580 2019-04-11 07:54:00.1840
## 2 1158 2019-05-15 18:00:33.3890 2019-05-15 18:19:52.0150
## 3 763 2019-03-25 13:27:50.4260 2019-03-25 13:40:33.7960
## 4 915 2019-06-21 15:52:07.8340 2019-06-21 16:07:23.6810
## 5 1368 2019-06-01 18:42:22.9500 2019-06-01 19:05:11.7510
## 6 267 2019-07-31 18:47:05.5630 2019-07-31 18:51:33.0870
## start.station.id start.station.name start.station.latitude
## 1 3711 E 13 St & Avenue A 40.72967
## 2 3016 Kent Ave & N 7 St 40.72037
## 3 382 University Pl & E 14 St 40.73493
## 4 359 E 47 St & Park Ave 40.75510
## 5 3295 Central Park W & W 96 St 40.79127
## 6 3377 Carroll St & Bond St 40.67861
## start.station.longitude end.station.id end.station.name end.station.latitude
## 1 -73.98068 168 W 18 St & 6 Ave 40.73971
## 2 -73.96165 3016 Kent Ave & N 7 St 40.72037
## 3 -73.99201 459 W 20 St & 11 Ave 40.74674
## 4 -73.97499 483 E 12 St & 3 Ave 40.73223
## 5 -73.96484 3142 1 Ave & E 62 St 40.76123
## 6 -73.99037 3398 Smith St & 9 St 40.67470
## end.station.longitude bikeid usertype birth.year gender age DATE
## 1 -73.99456 29807 Subscriber 1994 Male 25 2019-04-11
## 2 -73.96165 34411 Subscriber 1974 Male 45 2019-05-15
## 3 -74.00776 16078 Subscriber 1961 Male 58 2019-03-25
## 4 -73.98890 29904 Subscriber 1964 Female 55 2019-06-21
## 5 -73.96094 30247 Customer 1969 Unkown 50 2019-06-01
## 6 -73.99786 20315 Subscriber 1971 Male 48 2019-07-31
## Month Day Year distance
## 1 04 11 2019 1619.3162
## 2 05 15 2019 0.0000
## 3 03 25 2019 1869.6579
## 4 06 21 2019 2803.2652
## 5 06 01 2019 3360.4376
## 6 07 31 2019 767.6646
Speed of Rider Demographics
# Speed of the rider
rider_2019_sample$speed <- rider_2019_sample$distance/rider_2019_sample$tripduration
# Average speed of all riders
rider_2019_sample %>%
summarise(average_speed = mean(speed))
## average_speed
## 1 2.470045
# Average speed of young riders
rider_2019_sample %>%
filter(age <= 45) %>%
summarise(young_average = mean(speed))
## young_average
## 1 2.544806
# Average speed of old riders
rider_2019_sample %>%
filter(age >= 65) %>%
summarise(old_average = mean(speed))
## old_average
## 1 2.193164
# Average speed of female riders
rider_2019_sample %>%
filter(gender == "Female") %>%
summarise(female_average = mean(speed))
## female_average
## 1 2.331995
# Average speed of male riders
rider_2019_sample %>%
filter(gender == "Male") %>%
summarise(male_average = mean(speed))
## male_average
## 1 2.577479
# Average speed of subscribers
rider_2019_sample %>%
filter(usertype == "Customer") %>%
summarise(customer_average = mean(speed))
## customer_average
## 1 1.820502
# Average speed of customers
rider_2019_sample %>%
filter(usertype == "Subscriber") %>%
summarise(subscriber_average = mean(speed))
## subscriber_average
## 1 2.569449
# Scatter Plot of speed by age
rider_2019_sample %>%
ggplot(aes(x = age, y = speed)) +
geom_point(alpha = .25, color = 'blue', size = 1) +
geom_point(shape = 1, size = 1, colour = "black") +
labs(title="Average Speed of Riders by Age", x="Speed", y="Age")

# Boxplot of speed by gender
rider_2019_sample %>%
ggplot(aes(x = gender, y = speed)) +
geom_boxplot() +
labs(title="Speed of Riders by Gender", x="Gender", y="Speed")

# Boxplot of speed by customer type
rider_2019_sample %>%
ggplot(aes(x = usertype, y = speed)) +
geom_boxplot() +
labs(title="Speed of Riders by Customer Type", x="Customer Type", y="Speed")

Start Locations
top_height <- max(rider_2019_sample$start.station.latitude) - min(rider_2019_sample$start.station.latitude)
top_width <- max(rider_2019_sample$start.station.longitude) - min(rider_2019_sample$start.station.longitude)
top_borders <- c(bottom = min(rider_2019_sample$start.station.latitude) - 0.1 * top_height,
top = max(rider_2019_sample$start.station.latitude) + 0.1 * top_height,
left = min(rider_2019_sample$start.station.longitude) - 0.2 * top_width,
right = max(rider_2019_sample$start.station.longitude) + 0.2 * top_width)
start <- get_stamenmap(top_borders, zoom = 12, maptype = "toner-lite")
## Source : http://tile.stamen.com/toner-lite/12/1205/1537.png
## Source : http://tile.stamen.com/toner-lite/12/1206/1537.png
## Source : http://tile.stamen.com/toner-lite/12/1207/1537.png
## Source : http://tile.stamen.com/toner-lite/12/1205/1538.png
## Source : http://tile.stamen.com/toner-lite/12/1206/1538.png
## Source : http://tile.stamen.com/toner-lite/12/1207/1538.png
## Source : http://tile.stamen.com/toner-lite/12/1205/1539.png
## Source : http://tile.stamen.com/toner-lite/12/1206/1539.png
## Source : http://tile.stamen.com/toner-lite/12/1207/1539.png
## Source : http://tile.stamen.com/toner-lite/12/1205/1540.png
## Source : http://tile.stamen.com/toner-lite/12/1206/1540.png
## Source : http://tile.stamen.com/toner-lite/12/1207/1540.png
## Source : http://tile.stamen.com/toner-lite/12/1205/1541.png
## Source : http://tile.stamen.com/toner-lite/12/1206/1541.png
## Source : http://tile.stamen.com/toner-lite/12/1207/1541.png
start_map <- ggmap(start, extent = "device", legend = "topright")
start_map + stat_density2d(
aes(x = start.station.longitude, y = start.station.latitude, fill = ..level.., alpha = ..level..),
size = 1, bins = 5, data = rider_2019_sample,
geom = "polygon"
)

Start Location Preferences - by Day of Week
# convert dates to weekdays
rider_2019_sample$day_of_week = weekdays(rider_2019_sample$DATE)
start_map +
stat_density2d(
aes(x = start.station.longitude, y = start.station.latitude, fill = ..level.., alpha = ..level..),
bins = 5, geom = "polygon",
data = rider_2019_sample) +
scale_fill_gradient(low = "black", high = "red") +
facet_wrap(~ day_of_week)

Start Location Preferences - by Gender
start_map +
stat_density2d(
aes(x = start.station.longitude, y = start.station.latitude, fill = ..level.., alpha = ..level..),
bins = 5, geom = "polygon",
data = rider_2019_sample) +
scale_fill_gradient(low = "black", high = "red") +
facet_wrap(~ gender)

Start Location Preferences - by Customer Type
start_map +
stat_density2d(
aes(x = start.station.longitude, y = start.station.latitude, fill = ..level.., alpha = ..level..),
bins = 5, geom = "polygon",
data = rider_2019_sample) +
scale_fill_gradient(low = "black", high = "red") +
facet_wrap(~ usertype)

Start Location Preferences - by Trip Duration
ggmap(start) +
geom_point(data = rider_2019_sample, mapping = aes(x = start.station.longitude, y = start.station.latitude,
col = tripduration)) +
scale_color_gradient(low = "yellow", high = "red")

End Location Preferences
end <- get_stamenmap(top_borders, zoom = 12, maptype = "toner-lite")
end_map <- ggmap(end, extent = "device", legend = "topright")
end_map + stat_density2d(
aes(x = end.station.longitude, y = end.station.latitude, fill = ..level.., alpha = ..level..),
size = 1, bins = 5, data = rider_2019_sample,
geom = "polygon"
)

End Location Preferences - by Day of Week
end_map +
stat_density2d(
aes(x = end.station.longitude, y = end.station.latitude, fill = ..level.., alpha = ..level..),
bins = 5, geom = "polygon",
data = rider_2019_sample) +
scale_fill_gradient(low = "black", high = "red") +
facet_wrap(~ day_of_week)

End Location Preferences - by Gender
end_map +
stat_density2d(
aes(x = end.station.longitude, y = end.station.latitude, fill = ..level.., alpha = ..level..),
bins = 5, geom = "polygon",
data = rider_2019_sample) +
scale_fill_gradient(low = "black", high = "red") +
facet_wrap(~ gender)

End Location Preferences - by User Type
end_map +
stat_density2d(
aes(x = end.station.longitude, y = end.station.latitude, fill = ..level.., alpha = ..level..),
bins = 5, geom = "polygon",
data = rider_2019_sample) +
scale_fill_gradient(low = "black", high = "red") +
facet_wrap(~ usertype)
